Calibrated Peaks vs Temperature Correlation

This code reads in channel data from pinewood, sorts it into peaks, and then plots the peaks in relation to temperature

This code is calibrated for the Pinewood Datatset. If you want to use this code with another dataset, you must change the peak windows, as each dataset will have slightly different peak locations


In [ ]:
import pandas as pd
from bokeh.plotting import *
import numpy as np
from bokeh.layouts import row
from bokeh.models import LinearAxis, Range1d, LabelSet, Label
import scipy.stats
import math

df = pd.read_csv('pinewood_os_d3s.csv')
df2 = pd.read_csv('pinewood_os_weather.csv')

cols = [0,1,2,3,4]
df1 = df.drop(df.columns[cols],axis = 1)

#-----------------------------------------------------------------------------------------------------------------------

def find_nearest(array, value):

    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

#-----------------------------------------------------------------------------------------------------------------------

def chunk(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

#-----------------------------------------------------------------------------------------------------------------------
# This program bins the data into groups of 10,000 minutes.
# This is accomplished using the chunck method defined above, which splits a dataframe into smaller dataframes of a given size
# In this program, each chunk contains 2000 entries, which amounts to 10,000 minutes

time = []

dft = df[['deviceTime_unix']].copy()
for dft_chunk in chunk(dft,2000):
    time.append(dft_chunk.iloc[0]['deviceTime_unix'])


#-----------------------------------------------------------------------------------------------------------------------
# For Pinewood, it is less important to sync times up because the sensors were install ed at the same time, but 
# nevertheless, it still remains a good idea to keep the time sync around just in case things don't match up

temp = []

df2 = df2[['deviceTime_unix','temperature']].copy()

temptime = df2.as_matrix(columns=df2.columns[0:1]).ravel()
indx = find_nearest(temptime,time[0])
df2 = df2.drop(df2.index[0:indx])

for dftemp_chunk in chunk(df2,2000):
    temp.append(dftemp_chunk["temperature"].mean())

#-----------------------------------------------------------------------------------------------------------------------

i = 0
channels = []

while i <=1023:
    channels.append(i) # Create the array of channel labels
    i= i+1

#-----------------------------------------------------------------------------------------------------------------------

m = 200
bins = []
while m<=2800:
    bins.append(m) # Create array of bin edges
    m =m+5

#-----------------------------------------------------------------------------------------------------------------------

total = [] # Bismuth Counts
totalk = [] # Potassium Counts
totalth = [] # Thallium Counts
totalt = [] # Overall Counts

#-----------------------------------------------------------------------------------------------------------------------

for df_chunk in chunk(df1, 2000):

    buckets = [0] * 1500

    arr = df_chunk.as_matrix(columns=df_chunk.columns[1:])
    conv = df_chunk.as_matrix(columns=df_chunk.columns[0:1]).ravel()

    k = 0

    while k < len(conv):

        kev = [i * conv[k] for i in channels]

        indexes = np.digitize(kev, bins).flatten()

        i = 0
        while i < len(indexes):
            buckets[indexes[i] - 1] = buckets[indexes[i] - 1] + arr[k][i]
            i = i + 1

        k = k + 1

    # -----------------------------------------------------------------------------------------------------------------------
    # In order to remove the background counts in the peaks, we create a line from the endpoints of the window, integrate under that line,
    # and subtract that value from the integral under the peak
    
    bismuthA = buckets[80:100]

    sumBA = (np.trapz(bismuthA, bins[80:100]))

    x1 = bins[80]
    x2 = bins[100]
    y1 = bismuthA[0]
    y2 = bismuthA[len(bismuthA) - 1]

    m = ((math.log(y2) - math.log(y1)) / (x2 - x1))
    c = math.log(y1) - (m * x1)
    A = math.exp(c)

    xarino = np.arange(x1, x2)

    y = (A * np.exp((m * xarino)))

    bismuthbackground = (np.trapz(y, x=xarino))

    # -----------------------------------------------------------------------------------------------------------------------

    potassiumA = buckets[230:270]
    sumKA = (np.trapz(potassiumA, bins[230:270]))

    x1 = bins[230]
    x2 = bins[270]
    y1 = potassiumA[0]
    y2 = potassiumA[len(potassiumA) - 1]

    m = ((math.log(y2) - math.log(y1)) / (x2 - x1))
    c = math.log(y1) - (m * x1)
    A = math.exp(c)

    xarino = np.arange(x1, x2)

    y = (A * np.exp((m * xarino)))

    potassiumbackground = (np.trapz(y, x=xarino))

    # -----------------------------------------------------------------------------------------------------------------------

    thoriumA = buckets[423:450]
    sumTA = (np.trapz(thoriumA, bins[423:450]))

    x1 = bins[423]
    x2 = bins[450]
    y1 = thoriumA[0]
    y2 = thoriumA[len(thoriumA) - 1]

    m = ((math.log(y2)-math.log(y1))/(x2-x1))
    c = math.log(y1) - (m*x1)
    A = math.exp(c)

    xarino = np.arange(x1,x2)

    y = (A*np.exp((m*xarino)))

    thoriumbackground = (np.trapz(y,x=xarino))

    # -----------------------------------------------------------------------------------------------------------------------

    totalt.append(sum(buckets))
    total.append(sumBA-bismuthbackground)
    totalk.append(sumKA-potassiumbackground)
    totalth.append(sumTA-thoriumbackground)

# -----------------------------------------------------------------------------------------------------------------------


print str(scipy.stats.pearsonr(total[0:len(temp)],temp)) + " This is bismuth"
print str(scipy.stats.pearsonr(totalk[0:len(temp)],temp)) + " This is potassium"
print str(scipy.stats.pearsonr(totalth[0:len(temp)],temp)) + " This is thallium"

# -----------------------------------------------------------------------------------------------------------------------

p = figure()
p.circle(total[0:len(temp)],temp)
citation1 = Label(x=70, y=70, x_units='screen', y_units='screen',
                 text=str(scipy.stats.pearsonr(total[0:len(temp)],temp)),
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation1)



p2 = figure()
p2.circle(totalk[0:len(temp)],temp)

citation2 = Label(x=70, y=70, x_units='screen', y_units='screen',
                 text=str(scipy.stats.pearsonr(totalk[0:len(temp)],temp)),
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)
p2.add_layout(citation2)


p3 = figure()
p3.circle(totalth[0:len(temp)],temp)

citation3 = Label(x=70, y=70, x_units='screen', y_units='screen',
                 text=str(scipy.stats.pearsonr(totalth[0:len(temp)],temp)), render_mode='css',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)
p3.add_layout(citation3)

show(row(p,p2,p3))

# -----------------------------------------------------------------------------------------------------------------------
p = figure()

p.circle(time,total,color = 'firebrick')

p.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p.add_layout(LinearAxis(y_range_name="foo"), 'right')

p.circle(time,temp, y_range_name='foo', color='blue')

# -----------------------------------------------------------------------------------------------------------------------

p3 = figure()
p3.circle(time,totalth,color = 'green')

p3.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p3.add_layout(LinearAxis(y_range_name="foo"), 'right')

p3.circle(time,temp, y_range_name='foo', color='blue')

# -----------------------------------------------------------------------------------------------------------------------

p2 = figure()
p2.circle(time,totalk,color = 'orange')

p2.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p2.add_layout(LinearAxis(y_range_name="foo"), 'right')

p2.circle(time,temp, y_range_name='foo', color='blue')

# -----------------------------------------------------------------------------------------------------------------------

show(row(p,p2,p3))